setwd("~/Dropbox/clado-manuscript/Mikes_MS_Data/")
library(ggplot2)
library(plyr)
library(dplyr)
library(Rmisc)
library(DESeq2)
library(doParallel)
#source('http://bioconductor.org/biocLite.R')
#biocLite('phyloseq')
library(phyloseq)
# Load biom file.
biom <- import_biom("OTU_table.biom", "~/Dropbox/clado-manuscript/Nephele/PipelineResults_NMEPINZ20QK1/nephele_outputs/tree.tre", parseFunction=parse_taxonomy_greengenes)
sam.data <- read.csv(file="sample.data.csv", row.names=1, header=TRUE)
head(sam.data)
## TreatmentGroup Site Date Description
## C172N1 Early North 172 Sample of day 172 at site North 1
## C172N2 Early North 172 Sample of day 172 at site North 2
## C172N3 Early North 172 Sample of day 172 at site North 3
## C172P1 Early Point 172 Sample of day 172 at site Point 1
## C172P2 Early Point 172 Sample of day 172 at site Point 2
## C172P3 Early Point 172 Sample of day 172 at site Point 3
## SampleID.1
## C172N1 C172N1
## C172N2 C172N2
## C172N3 C172N3
## C172P1 C172P1
## C172P2 C172P2
## C172P3 C172P3
sam.data$Date <- as.factor(sam.data$Date)
sam.data$DateSite <- paste(sam.data$Date, sam.data$Site)
head(sam.data); str(sam.data)
## TreatmentGroup Site Date Description
## C172N1 Early North 172 Sample of day 172 at site North 1
## C172N2 Early North 172 Sample of day 172 at site North 2
## C172N3 Early North 172 Sample of day 172 at site North 3
## C172P1 Early Point 172 Sample of day 172 at site Point 1
## C172P2 Early Point 172 Sample of day 172 at site Point 2
## C172P3 Early Point 172 Sample of day 172 at site Point 3
## SampleID.1 DateSite
## C172N1 C172N1 172 North
## C172N2 C172N2 172 North
## C172N3 C172N3 172 North
## C172P1 C172P1 172 Point
## C172P2 C172P2 172 Point
## C172P3 C172P3 172 Point
## 'data.frame': 52 obs. of 6 variables:
## $ TreatmentGroup: Factor w/ 2 levels "Early","Late": 1 1 1 1 1 1 1 1 1 1 ...
## $ Site : Factor w/ 3 levels "North","Point",..: 1 1 1 2 2 2 3 3 3 1 ...
## $ Date : Factor w/ 6 levels "172","178","185",..: 1 1 1 1 1 1 1 1 1 2 ...
## $ Description : Factor w/ 52 levels "Sample of day 172 at site North 1",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ SampleID.1 : Factor w/ 52 levels "C172N1","C172N2",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ DateSite : chr "172 North" "172 North" "172 North" "172 Point" ...
sample_data(biom) <- sam.data
biom; sample_data(biom)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 51928 taxa and 52 samples ]
## sample_data() Sample Data: [ 52 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 51928 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 51928 tips and 51914 internal nodes ]
## Sample Data: [52 samples by 6 sample variables]:
## TreatmentGroup Site Date Description
## C178N1 Early North 178 Sample of day 178 at site North 1
## C178P1 Early Point 178 Sample of day 178 at site Point 1
## C185P2 Early Point 185 Sample of day 185 at site Point 2
## C206N2 Late North 206 Sample of day 206 at site North 2
## C206P1 Late Point 206 Sample of day 206 at site Point 1
## C206P2 Late Point 206 Sample of day 206 at site Point 2
## C214P1 Late Point 214 Sample of day 214 at site Point 1
## C214P2 Late Point 214 Sample of day 214 at site Point 2
## C214P3 Late Point 214 Sample of day 214 at site Point 3
## C214S1 Late South 214 Sample of day 214 at site South 1
## C214S2 Late South 214 Sample of day 214 at site South 2
## C214S3 Late South 214 Sample of day 214 at site South 3
## C185P1 Early Point 185 Sample of day 185 at site Point 1
## C185P3 Early Point 185 Sample of day 185 at site Point 3
## C199P3 Late Point 199 Sample of day 199 at site Point 3
## C199S2 Late South 199 Sample of day 199 at site South 2
## C199S3 Late South 199 Sample of day 199 at site South 3
## C206N1 Late North 206 Sample of day 206 at site North 1
## C178P2 Early Point 178 Sample of day 178 at site Point 2
## C199N3 Late North 199 Sample of day 199 at site North 3
## C206S1 Late South 206 Sample of day 206 at site South 1
## C214N3 Late North 214 Sample of day 214 at site North 3
## C199N2 Late North 199 Sample of day 199 at site North 2
## C206N3 Late North 206 Sample of day 206 at site North 3
## C206S2 Late South 206 Sample of day 206 at site South 2
## C199N1 Late North 199 Sample of day 199 at site North 1
## C199P1 Late Point 199 Sample of day 199 at site Point 1
## C199S1 Late South 199 Sample of day 199 at site South 1
## C214N1 Late North 214 Sample of day 214 at site North 1
## C172P1 Early Point 172 Sample of day 172 at site Point 1
## C199P2 Late Point 199 Sample of day 199 at site Point 2
## C172N3 Early North 172 Sample of day 172 at site North 3
## C172S3 Early South 172 Sample of day 172 at site South 3
## C178S2 Early South 178 Sample of day 178 at site South 2
## C178P3 Early Point 178 Sample of day 178 at site Point 3
## C178S3 Early South 178 Sample of day 178 at site South 3
## C172N1 Early North 172 Sample of day 172 at site North 1
## C172S1 Early South 172 Sample of day 172 at site South 1
## C178N3 Early North 178 Sample of day 178 at site North 3
## C185N2 Early North 185 Sample of day 185 at site North 2
## C185N3 Early North 185 Sample of day 185 at site North 3
## C185S3 Early South 185 Sample of day 185 at site South 3
## C214N2 Late North 214 Sample of day 214 at site North 2
## C172P2 Early Point 172 Sample of day 172 at site Point 2
## C185S2 Early South 185 Sample of day 185 at site South 2
## C172P3 Early Point 172 Sample of day 172 at site Point 3
## C185N1 Early North 185 Sample of day 185 at site North 1
## C172N2 Early North 172 Sample of day 172 at site North 2
## C178S1 Early South 178 Sample of day 178 at site South 1
## C185S1 Early South 185 Sample of day 185 at site South 1
## C172S2 Early South 172 Sample of day 172 at site South 2
## C178N2 Early North 178 Sample of day 178 at site North 2
## SampleID.1 DateSite
## C178N1 C178N1 178 North
## C178P1 C178P1 178 Point
## C185P2 C185P2 185 Point
## C206N2 C206N2 206 North
## C206P1 C206P1 206 Point
## C206P2 C206P2 206 Point
## C214P1 C214P1 214 Point
## C214P2 C214P2 214 Point
## C214P3 C214P3 214 Point
## C214S1 C214S1 214 South
## C214S2 C214S2 214 South
## C214S3 C214S3 214 South
## C185P1 C185P1 185 Point
## C185P3 C185P3 185 Point
## C199P3 C199P3 199 Point
## C199S2 C199S2 199 South
## C199S3 C199S3 199 South
## C206N1 C206N1 206 North
## C178P2 C178P2 178 Point
## C199N3 C199N3 199 North
## C206S1 C206S1 206 South
## C214N3 C214N3 214 North
## C199N2 C199N2 199 North
## C206N3 C206N3 206 North
## C206S2 C206S2 206 South
## C199N1 C199N1 199 North
## C199P1 C199P1 199 Point
## C199S1 C199S1 199 South
## C214N1 C214N1 214 North
## C172P1 C172P1 172 Point
## C199P2 C199P2 199 Point
## C172N3 C172N3 172 North
## C172S3 C172S3 172 South
## C178S2 C178S2 178 South
## C178P3 C178P3 178 Point
## C178S3 C178S3 178 South
## C172N1 C172N1 172 North
## C172S1 C172S1 172 South
## C178N3 C178N3 178 North
## C185N2 C185N2 185 North
## C185N3 C185N3 185 North
## C185S3 C185S3 185 South
## C214N2 C214N2 214 North
## C172P2 C172P2 172 Point
## C185S2 C185S2 185 South
## C172P3 C172P3 172 Point
## C185N1 C185N1 185 North
## C172N2 C172N2 172 North
## C178S1 C178S1 178 South
## C185S1 C185S1 185 South
## C172S2 C172S2 172 South
## C178N2 C178N2 178 North
head(otu_table(biom))
## OTU Table: [6 taxa and 52 samples]
## taxa are rows
## C178N1 C178P1 C185P2 C206N2 C206P1 C206P2
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 1
## New.CleanUp.ReferenceOTU321320 0 0 0 0 0 0
## KC551585.1.1451 2 0 0 6 0 4
## JQ945994.1.1399 0 0 0 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 137 8 1 73 8 6
## C214P1 C214P2 C214P3 C214S1 C214S2 C214S3
## New.CleanUp.ReferenceOTU155901 0 0 1 0 1 0
## New.CleanUp.ReferenceOTU321320 0 0 0 0 0 0
## KC551585.1.1451 9 4 11 2 1 0
## JQ945994.1.1399 1 0 0 1 0 0
## EF653577.1.1339 0 0 0 4 0 0
## JQ814729.1.1495 11 4 27 40 8 23
## C185P1 C185P3 C199P3 C199S2 C199S3 C206N1
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 1 0 0 0 0
## KC551585.1.1451 3 2 5 0 0 11
## JQ945994.1.1399 0 0 0 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 6 4 2 11 2 56
## C178P2 C199N3 C206S1 C214N3 C199N2 C206N3
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 0 0 0 0 0
## KC551585.1.1451 0 1 4 3 0 3
## JQ945994.1.1399 0 0 2 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 10 23 6 19 5 849
## C206S2 C199N1 C199P1 C199S1 C214N1 C172P1
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 0 0 0 0 0
## KC551585.1.1451 1 0 8 2 5 0
## JQ945994.1.1399 0 0 0 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 14 24 5 5 3 2
## C199P2 C172N3 C172S3 C178S2 C178P3 C178S3
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 0 0 0 0 0
## KC551585.1.1451 2 5 8 0 1 0
## JQ945994.1.1399 0 0 0 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 2 121 31 0 0 5
## C172N1 C172S1 C178N3 C185N2 C185N3 C185S3
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 0 0 0 0 0
## KC551585.1.1451 0 0 1 0 0 0
## JQ945994.1.1399 0 0 0 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 5 16 4 6 27 4
## C214N2 C172P2 C185S2 C172P3 C185N1 C172N2
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 0 1 0 0 0
## KC551585.1.1451 3 2 0 2 1 6
## JQ945994.1.1399 0 0 0 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 4 13 1 3 22 21
## C178S1 C185S1 C172S2 C178N2
## New.CleanUp.ReferenceOTU155901 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 0 0 0
## KC551585.1.1451 0 1 0 3
## JQ945994.1.1399 0 0 3 0
## EF653577.1.1339 0 0 0 0
## JQ814729.1.1495 2 3 21 18
# Custom plotting.
nolegend <- theme(legend.position="none")
readabund <- labs(y="read abundance")
# Normalize by relative abundance.
biom.relabund <- transform_sample_counts(biom, function(x) x / sum(x))
# Make a list of all genera identified.
genera.relabund <- sort(get_taxa_unique(biom.relabund, "Genus"), decreasing=FALSE)
write(genera.relabund, file = "genera-relabund.txt")
# Find all genera
all.genera <- read.table(file = "genera-relabund.txt")
all.genera <- as.vector(all.genera$V1)
#
biom.relabund.all.genera <- subset_taxa(biom.relabund, Genus %in% as.factor(all.genera))
biom.relabund.all.genera
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 11769 taxa and 52 samples ]
## sample_data() Sample Data: [ 52 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 11769 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 11769 tips and 11763 internal nodes ]
biom.relabund.all.genera <- psmelt(biom.relabund.all.genera)
biom.relabund.all.genera.genus <- biom.relabund.all.genera%>%
group_by(Sample, Genus)%>%
mutate(GenusAbundance = sum(Abundance))%>%
distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
head(biom.relabund.all.genera.genus)
## Source: local data frame [6 x 9]
## Groups: Sample, Genus [6]
##
## Sample GenusAbundance TreatmentGroup Site Date Family
## <chr> <dbl> <fctr> <fctr> <fctr> <fctr>
## 1 C178S2 0.14728018 Early South 178 Crenotrichaceae
## 2 C206N2 0.07541493 Late North 206 Thermaceae
## 3 C199S1 0.12408425 Late South 199 Crenotrichaceae
## 4 C185S1 0.12728636 Early South 185 Crenotrichaceae
## 5 C199S3 0.10486769 Late South 199 Crenotrichaceae
## 6 C172S1 0.10894857 Early South 172 Cytophagaceae
## # ... with 3 more variables: Genus <fctr>, Sample <chr>, Genus <fctr>
# Not working:
# p <- ggplot(biom.relabund.all.genera.genus, aes(as.factor(Date), GenusAbundance, color = Site))
# p <- p + geom_point() + facet_wrap(~Genus, scales="free_y")
# p
biom.relabund.all.genera.genus.est <- summarySE(biom.relabund.all.genera.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(biom.relabund.all.genera.genus.est)
## Site Date Genus N GenusAbundance sd se
## 1 North 172 A17 3 1.044934e-05 7.240333e-06 4.180208e-06
## 2 North 172 Achromobacter 3 1.205785e-06 2.088482e-06 1.205785e-06
## 3 North 172 Acidaminobacter 3 2.030206e-05 3.516421e-05 2.030206e-05
## 4 North 172 Acidocella 3 0.000000e+00 0.000000e+00 0.000000e+00
## 5 North 172 Acidovorax 3 4.023211e-05 3.508560e-05 2.025668e-05
## 6 North 172 Acinetobacter 3 1.395043e-04 1.803168e-04 1.041059e-04
## ci
## 1 1.798598e-05
## 2 5.188076e-06
## 3 8.735273e-05
## 4 0.000000e+00
## 5 8.715747e-05
## 6 4.479317e-04
biom.relabund.all.genera.genus.est$Date <- as.character(biom.relabund.all.genera.genus.est$Date)
biom.relabund.all.genera.genus.est$Date <- as.numeric(biom.relabund.all.genera.genus.est$Date)
p <- ggplot(biom.relabund.all.genera.genus.est, aes(x=Date, y=GenusAbundance, color = Site)) + geom_point() + geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) + geom_line() + facet_wrap(~Genus, ncol = 3, scales="free_y")
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))
